import requests
import bs4
import json
import xlwt

allData = []


def fetchUrl(url):
    """访问 url 网页，获取网页内容并返回"""
    headers = {
        'Accept': '*/*',
        'Accept-Encoding': 'gzip, deflate, br',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0',
    }
    r = requests.get(url, headers=headers)
    r.raise_for_status()
    r.encoding = r.apparent_encoding
    return r.text


def getData(url):
    """爬取数据"""
    html = fetchUrl(url)
    soup = bs4.BeautifulSoup(html, 'html.parser')
    titleList = soup.find('div', attrs={'class': 'add-list-1'}).ul.find_all('li')
    e1 = ''
    e2 = ''
    e3 = ''
    e3List = []  # 主演/类型/地区/年份
    for title in titleList:
        dataDict = {}  # 一个电视剧对应一个字典元素
        e1 = title.find('p', attrs={'class': 'text_over'}).text  # 集数
        e2 = title.find('p', attrs={'class': 'tit'}).a.text  # 电视剧名
        e3 = title.find_all('dl', attrs={'class': 'cf'})
        for e in e3:
            a = e.find('dd').find_all('a')
            a1 = ''
            for a2 in a:
                if a1:
                    a1 = a1 + '/' + a2.text
                else:
                    a1 = a2.text
            e3List.append(a1)
        dataDict['电视剧名'] = e2
        dataDict['集数'] = e1
        dataDict['主演'] = e3List[0]
        dataDict['类型'] = e3List[1]
        dataDict['地区'] = e3List[2]
        dataDict['年份'] = e3List[3]
        allData.append(dataDict)
        e3List.clear()


def main():
    """main函数入口"""
    page = 1
    while True:
        if page > 445:
            break
        url = 'https://kan.sogou.com/dianshiju/----{0}/'.format(page)
        print('第{}页'.format(page))
        page += 1
        getData(url)
    # 保存为json格式的文件
    # ensure_ascii=False表示非ASCII字符原样输出
    with open('搜狗影视热门电视剧.json', 'w', encoding='utf-8') as f:
        json.dump(allData, f, ensure_ascii=False, indent=4)
    print('json文件保存成功')
    # 保存为xls格式的文件
    # 创建一个workbook和一个worksheet，命名为python
    workbook = xlwt.Workbook()
    worksheet = workbook.add_sheet('python')
    title = ['电视剧名', '集数', '主演', '类型', '地区', '年份']  # 表头
    for i in range(len(title)):
        worksheet.write(0, i, title[i])
    for j in range(len(allData)):
        worksheet.write(j + 1, 0, allData[j]['电视剧名'])
        worksheet.write(j + 1, 1, allData[j]['集数'])
        worksheet.write(j + 1, 2, allData[j]['主演'])
        worksheet.write(j + 1, 3, allData[j]['类型'])
        worksheet.write(j + 1, 4, allData[j]['地区'])
        worksheet.write(j + 1, 5, allData[j]['年份'])
        workbook.save('搜狗影视热门电视剧.xls')
    print('xls文件保存成功')


main()
